#!/usr/bin/env python

# $LastChangedBy: ben.trettel $
# $LastChangedRevision: 9 $
# $LastChangedDate: 2013-01-20 03:44:53 +0000 (Sun, 20 Jan 2013) $
# $HeadURL: http://medos.googlecode.com/svn/trunk/src/importers/FitDay.py $

from HTMLParser import HTMLParser
from urllib2 import urlopen,Request,build_opener,HTTPCookieProcessor,install_opener
from urllib import urlencode,quote_plus
from cookielib import LWPCookieJar
from BeautifulSoup import BeautifulSoup,UnicodeDammit
import time
import datetime
from string import lower,upper,join,strip
import csv

COOKIE_FILE = "cookie"
url = 'https://www.fitday.com/fitness/Login.html'
values = {'LoginName' : '',
          'Password' : ''}

# Log in to FitDay
cookieJar = LWPCookieJar()
opener = build_opener(HTTPCookieProcessor(cookieJar))
install_opener(opener)

data = urlencode(values)
req = Request(url, data)
response = urlopen(req)
f = urlopen(req)
htmlSource = f.read()
cookieJar.save(COOKIE_FILE)
f.close()

# Filter the result a bit.
htmlSource = htmlSource[11:]
htmlSource = htmlSource.replace('\\"','"')
htmlSource = htmlSource.replace('\\n','\n')
htmlSource = htmlSource.replace('\\t','\t')

soup = BeautifulSoup(''.join(htmlSource))

to_extract = soup.findAll('script') # removing JS just because it gets in the way
for item in to_extract:
   item.extract()

titles = soup.findAll('title')

logged_in = False
for title in titles:
   if strip(str(title.string)) == 'FitDay Free Calorie Counter and Diet Journal:&nbsp;Foods Eaten':
      logged_in = True

if logged_in == False:
   raise Exception('Could not log in.')
else:
   print 'Logged in to FitDay. Saved cookie to '+COOKIE_FILE+'.'

# Download weight data
nexturl = 'https://www.fitday.com/fitness/WeightHistory.html'

while nexturl != '':
   # Load the cookie and visit the URL of interest.
   cookieJar.load(COOKIE_FILE)
   while True:
      try:
         req = urlopen(nexturl)
         break
      except:
         print "Error reading the URL. Trying again..."
   htmlSource = req.read()
   req.close()
   
   # Filter the result a bit.
   htmlSource = htmlSource[11:]
   htmlSource = htmlSource.replace('\\"','"')
   htmlSource = htmlSource.replace('\\n','\n')
   htmlSource = htmlSource.replace('\\t','\t')
   
   soup = BeautifulSoup(''.join(htmlSource))
   
   to_extract = soup.findAll('script') # removing JS
   for item in to_extract:
      item.extract()
   
   weight_table = soup.find('div', {'class' : 'ListView'}).table
   
   for i in range(1, len(weight_table('tr'))):
      weight_row = weight_table('tr')[i]
      
      weight_date_td = weight_row('td')[0]
      weight_date_td_div = weight_date_td('div')[0].string
      
      weight_td = weight_row('td')[1]
      weight_td_div = weight_td('div')[0].string
      
      print weight_date_td_div.string + '\t' + weight_td_div.string
   
   next_div = soup.find('div', {'class' : 'Prev'})
   next_a = next_div.a
   
   if next_a == None:
      nexturl = ''
   else:
      nexturl = 'https://www.fitday.com/fitness/' + next_a['href']
   
   #print nexturl
